In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import calendar
In [2]:
amazon_df= pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AMZN?period1=1656862000&period2=1688398000&interval=1d&events=history&includeAdjustedClose=true')
amazon_df.head()
Out[2]:
Date Open High Low Close Adj Close Volume
0 2022-07-05 107.599998 114.080002 106.320000 113.500000 113.500000 76583700
1 2022-07-06 113.209999 115.480003 112.010002 114.330002 114.330002 66958900
2 2022-07-07 113.849998 116.989998 113.489998 116.330002 116.330002 57872300
3 2022-07-08 114.599998 116.580002 113.690002 115.540001 115.540001 45719700
4 2022-07-11 114.080002 114.300003 110.870003 111.750000 111.750000 53487600

2) Cleaning data¶

In [3]:
amazon_df.isna().any()
Out[3]:
Date         False
Open         False
High         False
Low          False
Close        False
Adj Close    False
Volume       False
dtype: bool
In [4]:
amazon_df.isna().sum() #no null values
Out[4]:
Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64
In [5]:
amazon_df.dtypes
Out[5]:
Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object
In [6]:
amazon_df['new_Date']=pd.to_datetime(amazon_df['Date'],format='%Y-%m-%d')
amazon_df['new_Date']
Out[6]:
0     2022-07-05
1     2022-07-06
2     2022-07-07
3     2022-07-08
4     2022-07-11
         ...    
246   2023-06-27
247   2023-06-28
248   2023-06-29
249   2023-06-30
250   2023-07-03
Name: new_Date, Length: 251, dtype: datetime64[ns]

Separating year, month and day as separate columns¶

In [7]:
year=[]
month=[]
day=[]
dayofweek=[]
weekday=[]
for i in amazon_df['new_Date']:
  year.append(i.year)
  month.append(i.month)
  day.append(i.day)
  dayofweek.append(i.dayofweek)
  weekday.append(calendar.day_name[dayofweek[-1]])

amazon_df['YEAR']=year
amazon_df['MONTH'] = month
amazon_df['DAY']=day
amazon_df['DAY OF WEEK'] = dayofweek
amazon_df['WEEKDAY'] = weekday
In [8]:
amazon_df.head()
Out[8]:
Date Open High Low Close Adj Close Volume new_Date YEAR MONTH DAY DAY OF WEEK WEEKDAY
0 2022-07-05 107.599998 114.080002 106.320000 113.500000 113.500000 76583700 2022-07-05 2022 7 5 1 Tuesday
1 2022-07-06 113.209999 115.480003 112.010002 114.330002 114.330002 66958900 2022-07-06 2022 7 6 2 Wednesday
2 2022-07-07 113.849998 116.989998 113.489998 116.330002 116.330002 57872300 2022-07-07 2022 7 7 3 Thursday
3 2022-07-08 114.599998 116.580002 113.690002 115.540001 115.540001 45719700 2022-07-08 2022 7 8 4 Friday
4 2022-07-11 114.080002 114.300003 110.870003 111.750000 111.750000 53487600 2022-07-11 2022 7 11 0 Monday

3) What was the change in stock price over time?¶

In [9]:
plt.figure(figsize=(10,5))
plt.plot(amazon_df['new_Date'],amazon_df['Close'])
plt.title('Amazon Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

4) Visualize the change in a stock’s volume being traded, over time?¶

In [10]:
plt.figure(figsize=(10,5))
plt.plot(amazon_df['new_Date'],amazon_df['Volume'])
plt.title('Amazon Stock Volume traded Over Time')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.show()

5 What was the moving average of various stocks?¶

In [11]:
amazon_df['MA50'] = amazon_df['Close'].rolling(window=50).mean()
amazon_df['MA200'] = amazon_df['Close'].rolling(window=200).mean()
plt.plot(amazon_df['Date'], amazon_df['Close'], label='Amazon')
plt.plot(amazon_df['Date'], amazon_df['MA50'], label='MA50')
plt.plot(amazon_df['Date'], amazon_df['MA200'], label='MA200')
plt.title('Amazon Stock Price with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

6) What was the daily return average of a stock?¶

In [12]:
amazon_df['Daily Return'] = amazon_df['Close'].pct_change()
print('Daily return average of Amazon stock: ', round(amazon_df['Daily Return'].mean()*100, 2), '%')
Daily return average of Amazon stock:  0.09 %

7) Add a new column ‘Trend’ whose values are based on the 'Daily Return'.¶

In [13]:
amazon_df['Daily Return']
Out[13]:
0           NaN
1      0.007313
2      0.017493
3     -0.006791
4     -0.032803
         ...   
246    0.014529
247   -0.001084
248   -0.008834
249    0.019234
250   -0.001074
Name: Daily Return, Length: 251, dtype: float64
In [14]:
amazon_df['Trend'] = ['Up' if x >= 0 else 'Down' for x in amazon_df['Daily Return']]
amazon_df['Trend']
Out[14]:
0      Down
1        Up
2        Up
3      Down
4      Down
       ... 
246      Up
247    Down
248    Down
249      Up
250    Down
Name: Trend, Length: 251, dtype: object

8. Visualize trend frequency through a Pie Chart.¶

In [15]:
trend=amazon_df['Trend'].value_counts()
trend
Out[15]:
Up      126
Down    125
Name: Trend, dtype: int64
In [16]:
import plotly.express as px
from IPython.display import HTML
fig=px.pie(amazon_df,
           values=trend.values,
           names=trend.index,
           title='Amazon Stock Trend Frequency')
HTML(fig.to_html())
Out[16]:

9) What was the correlation between the daily returns of different stocks?¶

In [17]:
aapl_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AAPL?period1=1656862826&period2=1688398826&interval=1d&events=history&includeAdjustedClose=true')
goog_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1656862847&period2=1688398847&interval=1d&events=history&includeAdjustedClose=true')
msft_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/MSFT?period1=1656862865&period2=1688398865&interval=1d&events=history&includeAdjustedClose=true')
amzn_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AMZN?period1=1656862000&period2=1688398000&interval=1d&events=history&includeAdjustedClose=true')
In [22]:
aapl_df.isna().sum()
Out[22]:
Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64
In [21]:
goog_df.isna().sum()
Out[21]:
Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64
In [23]:
msft_df.isna().sum()
Out[23]:
Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64
In [24]:
amzn_df.isna().sum()
Out[24]:
Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64
In [25]:
amzn_df['Daily Return'] = amzn_df['Close'].pct_change()
goog_df['Daily Return'] = goog_df['Close'].pct_change()
msft_df['Daily Return'] = msft_df['Close'].pct_change()
aapl_df['Daily Return'] = aapl_df['Close'].pct_change()
In [26]:
returns_df = pd.concat([amzn_df['Daily Return'], goog_df['Daily Return'], msft_df['Daily Return'], aapl_df['Daily Return']], axis=1)
returns_df.columns = ['Amazon', 'Google', 'Microsoft', 'Apple']
returns_df
Out[26]:
Amazon Google Microsoft Apple
0 NaN NaN NaN NaN
1 0.007313 0.011648 0.012783 0.009607
2 0.017493 0.035521 0.008227 0.023999
3 -0.006791 0.007229 -0.002757 0.004715
4 -0.032803 -0.030341 -0.011769 -0.014758
... ... ... ... ...
246 0.014529 -0.000672 0.018168 0.015059
247 -0.001084 0.017393 0.003826 0.006328
248 -0.008834 -0.008837 -0.002382 0.001797
249 0.019234 0.007999 0.016386 0.023103
250 -0.001074 -0.003389 -0.007488 -0.007785

251 rows × 4 columns

In [27]:
# calculate the correlation between daily returns of different stocks
corr_matrix = returns_df.corr()

# print the correlation matrix
print(corr_matrix)
             Amazon    Google  Microsoft     Apple
Amazon     1.000000  0.705563   0.715576  0.640828
Google     0.705563  1.000000   0.790127  0.705345
Microsoft  0.715576  0.790127   1.000000  0.727176
Apple      0.640828  0.705345   0.727176  1.000000
In [ ]: